import os
from unstructured_ingest.v2.pipeline.pipeline import Pipeline
from unstructured_ingest.v2.pipeline.interfaces import ProcessorConfig

from unstructured_ingest.v2.processes.connectors.chroma import (
    ChromaAccessConfig,
    ChromaConnectionConfig,
    ChromaUploadStagerConfig,
    ChromaUploaderConfig
)

from unstructured_ingest.v2.processes.connectors.local import (
    LocalIndexerConfig,
    LocalConnectionConfig,
    LocalDownloaderConfig
)

from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
from unstructured_ingest.v2.processes.chunker import ChunkerConfig
from unstructured_ingest.v2.processes.embedder import EmbedderConfig

if __name__ == "__main__":

    Pipeline.from_configs(
        context=ProcessorConfig(),
        indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR")),
        downloader_config=LocalDownloaderConfig(),
        source_connection_config=LocalConnectionConfig(),
        partitioner_config=PartitionerConfig(
            partition_by_api=True,
            api_key=os.getenv("UNSTRUCTURED_API_KEY"),
            additional_partion_args={
                "split_pdf_page":True,
                "splite_pdf_allow_failed":True,
                "split_pdf_concurrency_level":15
            }
        ),
        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
        embedder_config=EmbedderConfig(embedder_provider="huggingface"),
        destination_connection_config=ChromaConnectionConfig(
            access_config=ChromaAccessConfig(
                settings={"persist_directory":"./chroma-persist"},
                headers={"Authorization":"Basic()"}
            ),
            host=os.getenv("CHROMA_HOST"),
            port=os.getenv("CHROMA_PORT"),
            tenant=os.getenv("CHROMA_TENANT"),
            database=os.getenv("CHROMA_DATABASE"),
            collection_name=os.getenv("CHROMA_COLLECTION")
        ),
        stager_config=ChromaUploadStagerConfig(),
        uploader_config=ChromaUploaderConfig()
    ).run()